import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import pickle

# Load the dataset
laptops_price = pd.read_csv(os.path.join(sys.argv[1], 'laptops_price.csv'))

price_conversion_rate = 1e-4
 
# Remove 'kg' from the weight values and convert to float
laptops_price['Weight'] = laptops_price['Weight'].str.replace('kg', '')
laptops_price['Weight'] = pd.to_numeric(laptops_price['Weight'], errors='coerce')
 
# Drop rows with NaN values in the 'Weight' column
laptops_price = laptops_price.dropna(subset=['Weight'])
 
# Convert weight to pounds
laptops_price['Weight_pounds'] = laptops_price['Weight'] * 2.20462
 
# Apply the price conversion rate
laptops_price['Price'] = laptops_price['Price'] * price_conversion_rate
 
# Filter the dataset
filtered_laptops = laptops_price[
    (laptops_price['Weight_pounds'] <= 3) &
    (laptops_price['Screen Size'].str.replace('"', '').astype(float).between(13, 15)) &
    (laptops_price['Price'].between(800, 1500))
]
 
# Select the required columns (without 'Storage')
filtered_laptops = filtered_laptops[['Manufacturer', 'Model Name', 'Category', 'Screen Size', 'Screen', 'CPU', 'RAM', 'GPU', 'Operating System', 'Operating System Version', 'Weight', 'Price']]

print(filtered_laptops)
# pickle.dump(filtered_laptops,open("./ref_result/filtered_laptops.pkl","wb"))




import pandas as pd   
import pickle
import matplotlib.pyplot as plt

# Load the dataset

# Define powerful CPUs (you can modify the list based on the desired CPUs)
powerful_cpus = [
 "Intel Core i7", "Intel Core i9", "AMD Ryzen 7", "AMD Ryzen 9"
]
 
# Create a boolean mask for powerful CPUs
filtered_laptops["Powerful_CPU"] = filtered_laptops["CPU"].apply(lambda x: any(cpu in x for cpu in powerful_cpus))
 
# Filter the dataset for powerful CPUs and at least 8GB of RAM
powerful_laptops = filtered_laptops[(filtered_laptops["Powerful_CPU"]) & (filtered_laptops["RAM"].str.replace("GB", "").astype(int) >= 8)]
 
# Count the number of laptops by manufacturer
laptop_counts = powerful_laptops["Manufacturer"].value_counts()
 
# Create a bar chart of the distribution of laptops with a powerful CPU and at least 8GB of RAM
plt.figure(figsize=(10, 6))
laptop_counts.plot(kind="bar")
plt.title("Distribution of Laptops with Powerful CPU and at least 8GB RAM")
plt.xlabel("Manufacturer")
plt.ylabel("Number of Laptops")
# plt.show()

print(laptop_counts)
pickle.dump(laptop_counts,open("./ref_result/laptop_counts.pkl","wb"))

plt.savefig('./ref_result/powerful_laptops_distribution.png')
# plt.show()
